{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "49277df7",
   "metadata": {},
   "source": [
    "## Select by length\n",
    "\n",
    "此示例选择器根据长度选择要使用的示例。当您担心构建的提示会超过上下文窗口的长度时，这非常有用。对于较长的输入，它将选择较少的示例来包含，而对于较短的输入，它将选择更多的示例。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "id": "f242efe8",
   "metadata": {},
   "outputs": [],
   "source": [
    "from langchain.prompts import FewShotPromptTemplate, PromptTemplate\n",
    "from langchain.prompts.example_selector import LengthBasedExampleSelector\n",
    "\n",
    "# 创建一个反义词的任务示例\n",
    "examples = [\n",
    "    {\"input\": \"开心\", \"output\": \"伤心\"},\n",
    "    {\"input\": \"高\", \"output\": \"矮\"},\n",
    "    {\"input\": \"精力充沛\", \"output\": \"没精打采\"},\n",
    "    {\"input\": \"粗\", \"output\": \"细\"},\n",
    "]\n",
    "\n",
    "example_prompt = PromptTemplate(\n",
    "    input_variables=[\"input\", \"output\"],\n",
    "    template=\"Input: {input}\\nOutput: {output}\",\n",
    ")\n",
    "example_selector = LengthBasedExampleSelector(\n",
    "    # 可供选择的示例。\n",
    "    examples=examples,\n",
    "    # PromptTemplate 用于格式化示例。\n",
    "    example_prompt=example_prompt,\n",
    "    # 格式化示例的最大长度。\n",
    "    # 长度由下面的 get_text_length 函数测量。\n",
    "    max_length=25,\n",
    "    # 用于获取字符串长度的函数，使用\n",
    "    # 确定要包含哪些示例。被注释掉是因为\n",
    "    # 如果未指定，则将其作为默认值提供。\n",
    "    # get_text_length: Callable[[str], int] = lambda x: len(re.split(\"\\n| \", x))\n",
    ")\n",
    "dynamic_prompt = FewShotPromptTemplate(\n",
    "    # 我们提供了一个ExampleSelector而不是示例。\n",
    "    example_selector=example_selector,\n",
    "    example_prompt=example_prompt,\n",
    "    prefix=\"给出每个输入的反义词\",\n",
    "    suffix=\"Input: {adjective}\\nOutput:\",\n",
    "    input_variables=[\"adjective\"],\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "id": "9af99801",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "给出每个输入的反义词\n",
      "\n",
      "Input: 开心\n",
      "Output: 伤心\n",
      "\n",
      "Input: 高\n",
      "Output: 矮\n",
      "\n",
      "Input: 精力充沛\n",
      "Output: 没精打采\n",
      "\n",
      "Input: 粗\n",
      "Output: 细\n",
      "\n",
      "Input: big\n",
      "Output:\n"
     ]
    }
   ],
   "source": [
    "#示例输入量较小，因此选择所有示例。\n",
    "print(dynamic_prompt.format(adjective=\"big\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "id": "45a1d3de",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "给出每个输入的反义词\n",
      "\n",
      "Input: 开心\n",
      "Output: 伤心\n",
      "\n",
      "Input: big and huge and massive and large and gigantic and tall and much much much much much bigger than everything else\n",
      "Output:\n"
     ]
    }
   ],
   "source": [
    "# 示例输入较长，因此仅选择一个示例。\n",
    "long_string = \"big and huge and massive and large and gigantic and tall and much much much much much bigger than everything else\"\n",
    "print(dynamic_prompt.format(adjective=long_string))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "id": "337a135a",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "给出每个输入的反义词\n",
      "\n",
      "Input: 开心\n",
      "Output: 伤心\n",
      "\n",
      "Input: 高\n",
      "Output: 矮\n",
      "\n",
      "Input: 精力充沛\n",
      "Output: 没精打采\n",
      "\n",
      "Input: 粗\n",
      "Output: 细\n",
      "\n",
      "Input: 胖\n",
      "Output: 瘦\n",
      "\n",
      "Input: 热情\n",
      "Output:\n"
     ]
    }
   ],
   "source": [
    "# 您也可以将示例添加到示例选择器。\n",
    "new_example = {\"input\": \"胖\", \"output\": \"瘦\"}\n",
    "dynamic_prompt.example_selector.add_example(new_example)\n",
    "print(dynamic_prompt.format(adjective=\"热情\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "id": "14a8e4ac",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'开心的反义词是悲伤。\\n高的反义词是矮小。\\n精力充沛的反义词是精疲力竭。\\n热的反义词是冷。\\n大的反义词是小。'"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from langchain_core.output_parsers import StrOutputParser\n",
    "from langchain_core.prompts import ChatPromptTemplate\n",
    "from langchain_openai import ChatOpenAI\n",
    "openai_api_key = \"EMPTY\"\n",
    "openai_api_base = \"http://127.0.0.1:1234/v1\"\n",
    "chat = ChatOpenAI(\n",
    "    openai_api_key=openai_api_key,\n",
    "    openai_api_base=openai_api_base,\n",
    "    temperature=0.7,\n",
    ")\n",
    "\n",
    "output_parser = StrOutputParser()\n",
    "\n",
    "chain = dynamic_prompt | chat | output_parser\n",
    "\n",
    "chain.invoke({\"adjective\":\"热情\"})"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "26c5d479",
   "metadata": {
    "tags": []
   },
   "source": [
    "## 最大余弦相似度的嵌入示例\n",
    "MaxMarginalRelevanceExampleSelector 根据与输入最相似的示例组合来选择示例，同时还针对多样性进行优化。它通过查找与输入具有最大余弦相似度的嵌入示例来实现这一点，然后迭代地添加它们，同时惩罚它们与已选择示例的接近程度。\n",
    "\n",
    "```\n",
    "pip install sentence-transformers\n",
    "pip install faiss-cpu\n",
    "```"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5100039d",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "9bd467aa",
   "metadata": {},
   "outputs": [],
   "source": [
    "from langchain.prompts import FewShotPromptTemplate, PromptTemplate\n",
    "from langchain.prompts.example_selector import (\n",
    "    MaxMarginalRelevanceExampleSelector,\n",
    "    SemanticSimilarityExampleSelector,\n",
    ")\n",
    "from langchain_community.vectorstores import FAISS\n",
    "from langchain_community.embeddings.huggingface import HuggingFaceEmbeddings\n",
    "\n",
    "embeddings_path = \"D:\\\\ai\\\\download\\\\bge-large-zh-v1.5\"\n",
    "embeddings = HuggingFaceEmbeddings(model_name=embeddings_path)\n",
    "\n",
    "example_prompt = PromptTemplate(\n",
    "    input_variables=[\"input\", \"output\"],\n",
    "    template=\"Input: {input}\\nOutput: {output}\",\n",
    ")\n",
    "\n",
    "# 创建反义词的假装任务的示例。\n",
    "examples = [\n",
    "    {\"input\": \"高\", \"output\": \"矮\"},\n",
    "    {\"input\": \"精力充沛\", \"output\": \"没精打采\"},\n",
    "    {\"input\": \"粗\", \"output\": \"细\"},\n",
    "    {\"input\": \"快乐\", \"output\": \"悲伤\"},\n",
    "]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "fd8d46e2",
   "metadata": {},
   "outputs": [],
   "source": [
    "example_selector = MaxMarginalRelevanceExampleSelector.from_examples(\n",
    "    # 可供选择的示例列表。\n",
    "    examples,\n",
    "    # 嵌入类用于生成用于测量语义相似性的嵌入。\n",
    "    embeddings,\n",
    "    # VectorStore 类用于存储嵌入并进行相似性搜索。\n",
    "    FAISS,\n",
    "    # 要生成的示例数量。\n",
    "    k=2,\n",
    ")\n",
    "mmr_prompt = FewShotPromptTemplate(\n",
    "    # 提供一个ExampleSelector而不是示例。\n",
    "    example_selector=example_selector,\n",
    "    example_prompt=example_prompt,\n",
    "    prefix=\"给出每个输入的反义词\",\n",
    "    suffix=\"Input: {adjective}\\nOutput:\",\n",
    "    input_variables=[\"adjective\"],\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "55a0e0fb",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "给出每个输入的反义词\n",
      "\n",
      "Input: 粗\n",
      "Output: 细\n",
      "\n",
      "Input: 快乐\n",
      "Output: 悲伤\n",
      "\n",
      "Input: 担心\n",
      "Output:\n"
     ]
    }
   ],
   "source": [
    "# 输入是一种感觉，所以应该选择快乐/悲伤的例子作为第一个\n",
    "print(mmr_prompt.format(adjective=\"担心\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "ed086e6c",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'Output: 放松'"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "chain = mmr_prompt | chat | output_parser\n",
    "\n",
    "chain.invoke({\"adjective\":\"担心\"})"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "4e852896",
   "metadata": {},
   "source": [
    "## 通过n-gram重叠选择\n",
    "\n",
    "NGramOverlapExampleSelector 根据 ngram 重叠得分，根据与输入最相似的示例来选择示例并对其进行排序。 ngram 重叠分数是 0.0 到 1.0 之间的浮点数（含 0.0 和 1.0）。\n",
    "\n",
    "选择器允许设置阈值分数。 ngram 重叠分数小于或等于阈值的示例被排除。默认情况下，阈值设置为 -1.0，因此不会排除任何示例，只会对它们重新排序。将阈值设置为 0.0 将排除与输入没有 ngram 重叠的示例。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "id": "e4e41470",
   "metadata": {},
   "outputs": [],
   "source": [
    "from langchain.prompts import FewShotPromptTemplate, PromptTemplate\n",
    "from langchain.prompts.example_selector.ngram_overlap import NGramOverlapExampleSelector\n",
    "\n",
    "example_prompt = PromptTemplate(\n",
    "    input_variables=[\"input\", \"output\"],\n",
    "    template=\"Input: {input}\\nOutput: {output}\",\n",
    ")\n",
    "\n",
    "# 虚构翻译任务的示例。.\n",
    "examples = [\n",
    "    {\"input\": \"See Spot run.\", \"output\": \"请参阅现场运行。\"},\n",
    "    {\"input\": \"My dog barks.\", \"output\": \"我的狗吠叫。\"},\n",
    "    {\"input\": \"cat can run\", \"output\": \"猫会跑\"},\n",
    "]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "id": "bfbe942d",
   "metadata": {},
   "outputs": [],
   "source": [
    "example_selector = NGramOverlapExampleSelector(\n",
    "    # 可供选择的示例。\n",
    "    examples=examples,\n",
    "    # PromptTemplate 用于格式化示例。\n",
    "    example_prompt=example_prompt,\n",
    "    # 选择器停止的阈值。\n",
    "    # 默认设置为-1.0。\n",
    "    threshold=-1.0,\n",
    "    # 对于负阈值：择器按 ngram 重叠分数对示例进行排序，并且不排除任何示例。\n",
    "    # 对于大于 1.0 的阈值：选择器排除所有示例，并返回一个空列表。\n",
    "    # 对于阈值等于 0.0:选择器按 ngram 重叠分数对示例进行排序，并排除那些与输入没有 ngram 重叠的内容。\n",
    ")\n",
    "dynamic_prompt = FewShotPromptTemplate(\n",
    "    # 提供了一个ExampleSelector而不是示例。\n",
    "    example_selector=example_selector,\n",
    "    example_prompt=example_prompt,\n",
    "    prefix=\"为每个输入提供中文翻译\",\n",
    "    suffix=\"Input: {sentence}\\nOutput:\",\n",
    "    input_variables=[\"sentence\"],\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "id": "09d40787",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "为每个输入提供中文翻译\n",
      "\n",
      "Input: cat can run\n",
      "Output: 猫会跑\n",
      "\n",
      "Input: See Spot run.\n",
      "Output: 请参阅现场运行。\n",
      "\n",
      "Input: My dog barks.\n",
      "Output: 我的狗吠叫。\n",
      "\n",
      "Input: cat can run fast.\n",
      "Output:\n"
     ]
    }
   ],
   "source": [
    "# 一个与“cat can run”有大量 ngram 重叠的示例输入。\n",
    "# 并且与“我的狗吠”没有重叠。\n",
    "print(dynamic_prompt.format(sentence=\"cat can run fast.\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "id": "dd129833",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "为每个输入提供中文翻译\n",
      "\n",
      "Input: cat can run\n",
      "Output: 猫会跑\n",
      "\n",
      "Input: cat can run fast.\n",
      "Output:\n"
     ]
    }
   ],
   "source": [
    "# 您可以设置排除示例的阈值。\n",
    "# 例如，设置阈值等于0.0\n",
    "# 排除与输入没有 ngram 重叠的示例。\n",
    "# 自从“我的狗叫了。” 与“cat can run fast”没有 ngram 重叠。\n",
    "# 它被排除在外。\n",
    "example_selector.threshold = 0.0\n",
    "print(dynamic_prompt.format(sentence=\"cat can run fast.\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "id": "016b5fd2",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "为每个输入提供中文翻译\n",
      "\n",
      "Input: cat can run\n",
      "Output: 猫会跑\n",
      "\n",
      "Input: cat can play fetch.\n",
      "Output:\n"
     ]
    }
   ],
   "source": [
    "# 设置小的非零阈值\n",
    "example_selector.threshold = 0.09\n",
    "print(dynamic_prompt.format(sentence=\"cat can play fetch.\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "id": "0bb65d99",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'猫会玩接球。'"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from langchain_core.output_parsers import StrOutputParser\n",
    "from langchain_core.prompts import ChatPromptTemplate\n",
    "from langchain_openai import ChatOpenAI\n",
    "openai_api_key = \"EMPTY\"\n",
    "openai_api_base = \"http://127.0.0.1:1234/v1\"\n",
    "chat = ChatOpenAI(\n",
    "    openai_api_key=openai_api_key,\n",
    "    openai_api_base=openai_api_base,\n",
    "    temperature=0.7,\n",
    ")\n",
    "\n",
    "output_parser = StrOutputParser()\n",
    "\n",
    "chain = dynamic_prompt | chat | output_parser\n",
    "\n",
    "chain.invoke({\"sentence\":\"cat can play fetch.\"})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "43554716",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
